/**************************************************************************
	Replication do-file: "Corruption in Customs"
	Cyril Chalendard, Ana Fernandes, Gael Raballand and Bob Rijkers
	
	Created on: 01/07/2022
**************************************************************************/

clear all
set more off, perm
cap log close
cls

* ----------------------- DIRECTORIES AND FOLDERS ----------------------- *

* Directories in which data are stored
global main "PUT YOUR DIRECTORY PATH HERE"
cd "$folder"

* Folders
global outputdata = "$main\Output Data"
cap mkdir "$main\Tables"
global tables = "$main\Tables"

* ----------------------------- BEGINS HERE ----------------------------- *

* -------
* Dataset
* -------
use "$outputdata\CFRR.dta", clear // open dataset

* Sample
keep if sample_int==1 & eis_f!=. // keep observations of interest

* New variables
** Indicators of tax revenue loss
*** Revenue losses over declarations with significant excess interaction - each of the numbers entering the formulas is a coefficient from panel B of Table A22
gen sli = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(0.732*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen sliw = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(1.112*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen sle = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(1.659*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen slew = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(2.085*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen slvt = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(0.851*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
*** After delegated randomization - each of the numbers entering the formulas is a coefficient from Table A23
gen post_sli = cond((sample_int==1 & wfr!=. & eis_f!=. & sig_c_f==1), (exp(0.146*wfr + 0.043*eis_f + 1.279*wfr_eis_f)*tot_taxes_usd - tot_taxes_usd), cond((sample_int==1 & wfr!=. & eis_f!=.), 0, .))
gen post_sle = cond((sample_int==1 & wfr!=. & eis_f!=. & sig_c_f==1), (exp(0.267*wfr + eis_f*0.021 + wfr_eis_f*3.328)*tot_taxes_usd - tot_taxes_usd), cond((sample_int==1 & wfr!=. & eis_f!=.), 0, .))
gen post_slvt = cond((sample_int==1 & wfr!=. & eis_f!=. & sig_c_f==1), (exp(0.024*wfr + eis_f*0.184 + wfr_eis_f*0.479)*tot_taxes_usd - tot_taxes_usd), cond((sample_int==1 & wfr!=. & eis_f!=.), 0, .))
label var post_sli "Extra tax yield no corruption - price adjusted (internal) wfr"
label var post_sle "Extra tax yield no corruption - price adjusted (external) wfr"
label var post_slvt "Extra tax yield no corruption - valitrade price wfr"


* ---------
* Table A24
* ---------

* Panel A
* -------

* Matrix
mat tableA24_1 = J(8,4,.)
mat tableA24_1P1 = J(8,1,.)
mat tableA24_1P2 = J(8,1,.)

* Estimations
** Column 1: Per declaration with significant excess interaction
qui: tabstat tot_taxes_usd sli sliw sle slew slvt if nc_sem<11 & sample_reg==1 & eis_f!=. & sig_c_f==1 , stats(mean) save // mean of counterfactuals
mat mat1 = r(StatTotal)
mat tableA24_1[1,1] = mat1[1,1] // actual tax yield
local j = 2
forvalues i = 4(1)8 {
	mat tableA24_1[`i',1] = mat1[1,`j']
	local j = `j'+1
}
** Column 2: Percentage
local j = 2
forvalues i = 4(1)8 {
	mat tableA24_1P1[`i',1] = mat1[1,`j']/mat1[1,1]
	local j = `j'+1
} // Loop: each value in column 1 divided by the actual tax yield
** Column 3: Per declaration (any)
qui: tabstat tot_taxes_usd sli sliw sle slew slvt if nc_sem<11 & sample_reg==1 & eis_f!=. , stats(mean) save // mean of counterfactuals
mat mat2 = r(StatTotal)
mat tableA24_1[1,3] = mat2[1,1] // actual tax yield
local j = 2
forvalues i = 4(1)8 {
	mat tableA24_1[`i',3] = mat2[1,`j']
	local j = `j'+1
}
** Column 4: Percentage
local j = 2
forvalues i = 4(1)8 {
	mat tableA24_1P2[`i',1] = mat2[1,`j']/mat2[1,1]
	local j = `j'+1
} // Loop: each value in column 1 divided by the actual tax yield


* Panel B
* -------

* Matrix
mat tableA24_2 = J(6,4,.)
mat tableA24_2P1 = J(6,1,.)
mat tableA24_2P2 = J(6,1,.)

* Estimations
** Column 1: Significant excess interaction and withheld
qui: tabstat tot_taxes_usd post_sli post_sle post_slvt if sample_int==1 & eis_f!=. & sig_c_f==1 & wfr==1 , stats(mean) save // mean of counterfactuals
mat mat3 = r(StatTotal)
mat tableA24_2[1,1] = mat3[1,1] // actual tax revenue
local j = 2
forvalues i = 4(1)6 {
	mat tableA24_2[`i',1] = mat3[1,`j']
	local j = `j'+1
}
** Column 2: Percentage
local j = 2
forvalues i = 4(1)6 {
	mat tableA24_2P1[`i',1] = mat3[1,`j']/mat3[1,1]
	local j = `j'+1
} // Loop: each value in column 1 divided by the actual tax revenue
** Column 3: Per declaration (any)
qui: tabstat tot_taxes_usd post_sli post_sle post_slvt if sample_int==1 & wfr!=. & eis_f!=. , stats(mean) save // mean of counterfactuals
mat mat4 = r(StatTotal)
mat tableA24_2[1,3] = mat4[1,1] // actual tax revenue
local j = 2
forvalues i = 4(1)6 {
	mat tableA24_2[`i',3] = mat4[1,`j']
	local j = `j'+1
}
** Column 4: Percentage
local j = 2
forvalues i = 4(1)6 {
	mat tableA24_2P2[`i',1] = mat4[1,`j']/mat4[1,1]
	local j = `j'+1
} // Loop: each value in column 1 divided by the actual tax revenue


* Export
* ------
putexcel set "$tables\Table A24.xlsx", replace sheet(TableA24) // create a new excel

* Titles
local cells `" "B" "D" "'
forvalues i = 1/2 {
	local x: word `i' of `cells'
	putexcel `x'1 = "Tax yield per declaration (average)", hcenter vcenter bold
}
local cells `" "C" "E" "'
forvalues i = 1/2 {
	local x: word `i' of `cells'
	putexcel `x'1 = "% Counterfactual increase without corruption", hcenter vcenter bold
}

* Panel A
** Name
putexcel (A3:E3), merge hcenter vcenter bold
putexcel A3 = "A. Before Delegated Randomization of Inspector Assignment"
* Dependent variables
putexcel (B4:C4), merge hcenter vcenter
putexcel B4 = "Declarations with significant excess interaction"
putexcel (D4:E4), merge hcenter vcenter
putexcel D4 = "All declarations"
** Coefficients
putexcel B5 = matrix(tableA24_1), nformat(currency_negbra) hcenter vcenter
putexcel C5 = matrix(tableA24_1P1), nformat(0.0%) hcenter vcenter
putexcel E5 = matrix(tableA24_1P2), nformat(0.0%) hcenter vcenter
** Independent variables
putexcel A5 = "Actual tax yield"
local j = 7
local labels `" "Counterfactual tax yield without corruption, based on:" "internal reference price" "internal reference price and measured weight" "external reference price" "external reference price and measured weight" "third party valuation advice" "'
forvalues i = 1(1)6 {

	local x: word `i' of `labels'

	putexcel A`j' = "`x'", left vcenter
	local j = `j'+1
}

* Panel B
** Name
putexcel (A14:E14), merge hcenter vcenter bold
putexcel A14 = "B. During Delegated Randomization of Inspector Assignment"
* Dependent variables
putexcel (B15:C15), merge hcenter vcenter
putexcel B15 = "Declarations with significant excess interaction that were withheld from randomization"
putexcel (D15:E15), merge hcenter vcenter
putexcel D15 = "All declarations"
** Coefficients
putexcel B16 = matrix(tableA24_2), nformat(currency_negbra) hcenter vcenter
putexcel C16 = matrix(tableA24_2P1), nformat(0.0%) hcenter vcenter
putexcel E16 = matrix(tableA24_2P2), nformat(0.0%) hcenter vcenter
** Independent variables
putexcel A16 = "Actual tax revenue"
local j = 18
local labels `" "Additional counterfactual tax yield without corruption, based on:" "internal reference price" "external reference price" "third party valuation advice" "'
forvalues i = 1(1)4 {

	local x: word `i' of `labels'

	putexcel A`j' = "`x'", left vcenter
	local j = `j'+1
}


* -------------------------------- ENDS HERE -------------------------------- *